########################################
#### figure_3.R: Generates Figure 3 ####
########################################

dta_profile = dta_overtime |> 
  mutate(
    accuracy_dgr = case_when(
      accuracy %in% 1:3 ~ 0, # Flipped
      accuracy %in% 4:5 ~ 1,
      .default = NA_integer_
    ),
    is_denier = accuracy_dgr,
    is_post = date >= as.Date("2022-11-08"),
    maga = case_when(
      pid == "Democrat" ~ 0,
      maga == 1 ~ 1,
      .default = 0
    ),
    pid = as.factor(pid),
    age = 2022 - birthyr,
    age_bin = case_when(
      age < 40 ~ "18-39",
      age >= 40 & age < 65 ~ "40-64",
      age >= 65 ~ "65+"
    ),
    income_bin = case_match(
      faminc_new,
      c(1,2,3,4,5) ~ "< $50K",
      c(6,7,8,9) ~ "$50-100K",
      c(10,11,12) ~ "$100-200K",
      c(13,14,15,16) ~ "> $200K",
      .default = NA_character_
    ),
    sex = case_match(gender,
                     1 ~ "Male",
                     2 ~ "Female"),
    race = case_match(
      race,
      1 ~ "White",
      2 ~ "Black",
      3 ~ "Hispanic",
      4 ~ "Asian",
      6 ~ "Two or More",
      c(5,7,8) ~ "Other"
    ),
    education = case_match(
      educ,
      c(1,2) ~ "HS or Less",
      3 ~ "Some College",
      4 ~ "Associates",
      5 ~ "Bachelor",
      6 ~ "Post-Grad"
    ),
    married = case_match(
      marstat,
      1 ~ "Married",
      2 ~ "Separated",
      3 ~ "Divorced",
      4 ~ "Widowed",
      c(5,6) ~ "Never Married",
      .default = "Never Married"
    ),
    workforce = case_match(
      employ,
      c(1,2,3) ~ "In Labor Force",
      4 ~ "Unemployed",
      c(5,6,7,8,9) ~ "Not in Labor Force",
      .default = "Not in Labor Force"
    ),
    income = ifelse(faminc_new == 97, NA, faminc_new),
    # Define Groups
    maga_skeptic = maga == 1 & is_denier == 1,
    non_maga_skeptic = maga == 0 & is_denier == 1,
    r_skeptic = pid == "Republican" & is_denier == 1,
    d_skeptic = pid == "Democrat" & is_denier == 1,
    i_skeptic = pid == "Independent" & is_denier == 1
  )

vars_splits = c("is_denier","maga_skeptic","non_maga_skeptic")
vars = c("race","education","married",
         "workforce","sex", "income_bin", "age_bin")

var_grid = expand.grid(vars_splits, vars)

demo_tabs = map2_dfr(var_grid$Var1, var_grid$Var2, \(x,y){
  xs = as.symbol(as.character(x))
  ys = as.symbol(as.character(y))
  dta_profile |> 
    filter(!is.na({{ys}}), !is.na({{xs}})) |> 
    as_survey_design(ids = uid,
                     weights = weight) |> 
    group_by({{xs}}, {{ys}}) |> 
    summarise(prop = survey_prop(vartype = 'se')) |> 
    rename(var_level = {{ys}},
           group_level = {{xs}}) |> 
    mutate(var = as.character(y),
           group = as.character(x))
})

# ACS Things (See Census Tables):
acs_age = tribble(
  ~ group, ~ var, ~ meta_cat, ~ var_level, ~ n,
  "acs","age_bin","18-24", "18-39", 31254823,
  "acs","age_bin","25-29", "18-39", 22007628,
  "acs","age_bin","30-34", "18-39", 23225636,
  "acs","age_bin","35-39", "18-39", 22335859,
  "acs","age_bin","40-44", "40-64", 21711785,
  "acs","age_bin","45-49", "40-64", 19640925,
  "acs","age_bin","50-54", "40-64", 20835521,
  "acs","age_bin","55-59", "40-64", 20553615,
  "acs","age_bin","60-64", "40-64", 21573853,
  "acs","age_bin","65-69", "65+", 18559497,
  "acs","age_bin","70-74", "65+", 15338575,
  "acs","age_bin","75-79", "65+", 11005461,
  "acs","age_bin","80-84", "65+", 6758839,
  "acs","age_bin","85+",   "65+", 6159943
) |> 
  group_by(group, var, var_level) |> 
  summarise(n = sum(n)) |> 
  ungroup() |> 
  mutate(prop = n/sum(n)) |> 
  select(group, var, var_level, prop)

acs_df = tribble(
  ~ group, ~ var, ~ var_level, ~ prop,
  "acs","sex", "Male", 49.6,
  "acs","sex", "Female", 50.4,
  "acs","race", "White", 57.7,
  "acs","race", "Black", 11.9,
  "acs","race", "Asian", 5.8,
  "acs","race", "Two or More", 4.3,
  "acs","race", "Hispanic", 19.1,
  "acs","race", "Other", 1.3,
  "acs","education", "HS or Less", 36.5,
  "acs","education","Some College", 19.1,
  "acs","education","Associates", 8.8,
  "acs","education","Bachelor",21.6,
  "acs","education","Post-Grad", 14,
  "acs","married","Married",48,
  "acs","married","Widowed",5.5,
  "acs","married","Divorced",10.5,
  "acs","married","Separated",1.7,
  "acs","married","Never Married", 34.3,
  "acs","workforce","In Labor Force",60.8,
  "acs","workforce","Unemployed",2.7,
  "acs","workforce","Not in Labor Force",36.5,
  "acs","income_bin","< $50K",34,
  "acs","income_bin","$50-100K",29,
  "acs","income_bin","$100-200K",25.6,
  "acs","income_bin","> $200K", 11.5
) |> 
  mutate(prop = prop/100) |> 
  bind_rows(acs_age)

fix_levels = \(data, variable){
  if(variable == "sex"){out = data}
  if(variable == "race"){
    out = mutate(data,
                 var_level = factor(var_level, 
                                    levels = c("White","Black","Hispanic",
                                               "Asian","Two or More","Other") |> rev(),
                                    ordered = T))
  }
  if(variable == "workforce"){
    out = mutate(data,
                 var_level = factor(var_level, 
                                    levels = c("In Labor Force", "Unemployed",
                                               "Not in Labor Force") |> rev(),
                                    ordered = T))
  }
  if(variable == "education"){
    out = mutate(data,
                 var_level = factor(var_level, 
                                    levels = c("HS or Less","Some College",
                                               "Associates","Bachelor",
                                               "Post-Grad") |> rev(),
                                    ordered = T))
  }
  if(variable == "married"){
    out = mutate(data,
                 var_level = factor(var_level, 
                                    levels = c("Married","Widowed","Divorced",
                                               "Separated","Never Married") |> rev(),
                                    ordered = T))
  }
  if(variable == "income_bin"){
    out = mutate(data,
                 var_level = factor(var_level, 
                                    levels = c("< $50K", "$50-100K",
                                               "$100-200K","> $200K") |> rev(),
                                    ordered = T))
  }
  if(variable == "age_bin"){
    out = mutate(data,
                 var_level = factor(var_level, 
                                    levels = c("18-39","40-64","65+") |> rev(),
                                    ordered = T))
  }
  return(out)
}

plot_dists = \(data, acs_data, variable, var_title){
  dat = data |> 
    filter(group_level == 1) |> 
    bind_rows(acs_data) |> 
    filter(var == variable) |>
    fix_levels(data = _, variable = variable) |> 
    mutate(group = case_match(
      group,
      "acs" ~ "US Pop. (ACS)",
      "is_denier" ~ "Skeptic",
      "maga_skeptic" ~ "MAGA Skeptic",
      "non_maga_skeptic" ~ "Non-MAGA Skeptic"
    ))
  npal = n_distinct(dat$var_level) + 1
  pal = (viridis_pal()(npal))[-npal]
  out = dat |> 
    ggplot(aes(x = prop, y = group, fill = var_level)) +
    geom_bar(stat = 'identity') +
    geom_vline(xintercept = .5, color = "grey35") +
    scale_fill_manual(name = var_title,
                      values = pal,   
                      guide = guide_legend(reverse = T)) +
    scale_x_continuous(labels = scales::percent) +
    labs(x = NULL, y = NULL) +
    theme_classic() +
    theme(legend.position = "bottom")
  return(out)
}

p_race = plot_dists(demo_tabs, acs_df, "race", "Race") + guides(fill = guide_legend(nrow = 1, byrow = T))
p_sex = plot_dists(demo_tabs, acs_df, "sex", "Gender")
p_edu = plot_dists(demo_tabs, acs_df, "education", "Education")
p_married = plot_dists(demo_tabs, acs_df, "married", "Marital Status")
p_work = plot_dists(demo_tabs, acs_df, "workforce", "Labor Status")
p_age = plot_dists(demo_tabs, acs_df, "age_bin", "Age")
p_income = plot_dists(demo_tabs, acs_df, "income_bin", "Household Income")

layout = c("
  AAAA
  BBCC
  DDEE
  FFGG"
)

fig3 = p_race + p_sex + p_edu + p_age + p_income + p_married + p_work + plot_layout(design = layout)
print(fig3)

ggsave(here("Figures","figure_3.pdf"), fig3,
       dpi = 600, height = 9, width = 14)

### Figure S4 ###

vars_splits = c("r_skeptic","d_skeptic","i_skeptic")
vars = c("race","education","married",
         "workforce","sex", "income_bin", "age_bin")

var_grid = expand.grid(vars_splits, vars)

demo_tabs = map2_dfr(var_grid$Var1, var_grid$Var2, \(x,y){
  xs = as.symbol(as.character(x))
  ys = as.symbol(as.character(y))
  dta_profile |> 
    filter(!is.na({{ys}}), !is.na({{xs}})) |> 
    as_survey_design(ids = uid,
                     weights = weight) |> 
    group_by({{xs}}, {{ys}}) |> 
    summarise(prop = survey_prop(vartype = 'se')) |> 
    rename(var_level = {{ys}},
           group_level = {{xs}}) |> 
    mutate(var = as.character(y),
           group = as.character(x))
})

plot_dists = \(data, acs_data, variable, var_title){
  dat = data |> 
    filter(group_level == 1) |> 
    bind_rows(acs_data) |> 
    filter(var == variable) |>
    fix_levels(data = _, variable = variable) |> 
    mutate(group = case_match(
      group,
      "acs" ~ "US Pop. (ACS)",
      "r_skeptic" ~ "Rep. Skeptic",
      "d_skeptic" ~ "Dem. Skeptic",
      "i_skeptic" ~ "Ind. Skeptic"
    ))
  npal = n_distinct(dat$var_level) + 1
  pal = (viridis_pal()(npal))[-npal]
  out = dat |> 
    ggplot(aes(x = prop, y = group, fill = var_level)) +
    geom_bar(stat = 'identity') +
    geom_vline(xintercept = .5, color = "grey35") +
    scale_fill_manual(name = var_title,
                      values = pal,   
                      guide = guide_legend(reverse = T)) +
    scale_x_continuous(labels = scales::percent) +
    labs(x = NULL, y = NULL) +
    theme_classic() +
    theme(legend.position = "bottom")
  return(out)
}

p_race = plot_dists(demo_tabs, acs_df, "race", "Race") + guides(fill = guide_legend(nrow = 1, byrow = T))
p_sex = plot_dists(demo_tabs, acs_df, "sex", "Gender")
p_edu = plot_dists(demo_tabs, acs_df, "education", "Education")
p_married = plot_dists(demo_tabs, acs_df, "married", "Marital Status")
p_work = plot_dists(demo_tabs, acs_df, "workforce", "Labor Status")
p_age = plot_dists(demo_tabs, acs_df, "age_bin", "Age")
p_income = plot_dists(demo_tabs, acs_df, "income_bin", "Household Income")

layout = c("
  AAAA
  BBCC
  DDEE
  FFGG"
)

fig_s4 = p_race + p_sex + p_edu + p_age + p_income + p_married + p_work + plot_layout(design = layout)
print(fig_s4)

ggsave(here("Figures","figure_s4.pdf"), fig_s4,
       dpi = 600, height = 9, width = 14)
